home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
MacHack 1999
/
MacHack 1999.toast
/
The Hacks
/
YRTP
/
cgi-bin
/
webparser.pl
Wrap
Perl Script
|
1999-06-25
|
3KB
|
131 lines
#!/usr/bin/perl
print "Content-type: text/html\n\n";
use LWP::Simple;
require 'lock.pl';
# YRTP: the Perl bit
# P.D. Magnus
# June 1999
# get the query
$line = $ENV{'QUERY_STRING'}."&";
$line =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge;
# parse the query
SWITCH: for ($line) {
# determine the url
/url=(.+?)&/ ? ($urline = $1) : ($urline = "");
# determine the cue type
$jump = (/cue=jump/);
}
if ($urline eq "") {$urline = "http://www.fecundity.com/";}
if (!($urline =~ m|http:|i)) {$urline = "http://".$urline;}
PAGE: for (get $urline) {
# this bit underwrites a really gruddy mechanism for converting relative
# to absolute url's-- here it figures out the root
$root = $urline;
if (($root =~ m|.*\x31htm|) || ($root =~ m|.*\x31pl|)) {
$root =~ s|(.*)/.+|$1|;
} else {
$root .= '/';
}
# doctor the url
$urline =~ s|http:|yrtp:|i;
# check for frames
if (m|<frame|i) {
s|NOFRAMES.*/NOFRAMES||gsi;
# absolutize links
s|SRC\s*=\s*"(?:http://)*(.*?)"|src = "webparser.pl?url=$root/$1&cue=jump"|gi;
if (!$jump) {s|cue=jump|cue=delay|g};
s|(?=<src = "webparser.pl?url=.*?)\x2F(?=.*?&)|%2F|g;
# doctor title
s|<TITLE>.*</TITLE>|<title>$urline</title>|gi;
# print out the altered page
print $_;
last PAGE;
}
# convert to lower-case
$_ = lc;
# tabulate links
@links = ("http://www.fecundity.com/codeweb");
push @links, m|href="(.*?)"|g;
# absolutize
foreach (@links) {
if (m|http:|) {
s|http://||;
} else {
$_ = $root.'/'.$_;
}
}
# eliminate title
s|title.*/title||gs;
# include images with alt's
s|<img.*?alt="(.+?)".*?\Q>\E|img_$1_|g;
# convert links
s|<a.*?\Q>\E|L|g;
s|</a\Q>\E|M|g;
# convert special characters
s|"||g;
s|&|and|g;
s| |_|g;
# headlines
s|<h1>|X|g;
s|<h2>|Y|g;
s|<h3>|Z|g;
s|<h5>|W|g;
s|</h\d>|N|g;
# remaining tags
s|<.*?\Q>\E||gs;
# shuck out whitespace
s|\W||g;
# convert numbers
s|(\d+)|A$1j|g;
for ($i = 1; $i < 10; $i++) {
$j = chr ($i + 96);
s|$i|$j|g;
}
s|0|k|g;
# output the java-ized file with the appropriate data inside
(open (INFILE, $ENV{'DOCUMENT_ROOT'}.'/codeweb/rawcode'.($jump?'_j':'').'.html')) || die;
&lock(INFILE,0);
$line = <INFILE>;
print $line;
print "<TITLE>".$urline."</TITLE>\n";
do {
$line = <INFILE>;
print $line;
} until ($line =~ m|Signal Code|);
# spit the output
$line = qq{var output = '$_';\n};
$line =~ s|''|'B'|;
print $line;
# spit the links
print qq{var outlink = new Array();\n};
for($i=0; $i<$#links; $i++) {
print qq{outlink[$i] = '$links[$i]';\n};
}
do {
$line = <INFILE>;
print $line;
} until ($line =~ m|<!-- Never|);
&unlock(INFILE);
close (INFILE);
}